### Replication script for Figure 1 ###
library(tidyverse) # 1.3.2
library(srvyr) # 1.2.0
library(forcats) # 0.5.1
library(patchwork) # 1.3.0.9000
library(ggtext) # 0.1.2

load("lb_clean.RData")

# --- Prepare cleaned data for survey estimation
lb_svy <- lb_clean %>%
  drop_na(crim_gov, crim_presence) %>%
  group_by(stratum_id) %>%
  mutate(stratum_size = n()) %>%
  ungroup() %>%
  filter(stratum_size > 1) %>%
  as_survey_design(strata = stratum_id, weights = wt)

# Will need country populations
country_pops <- lb_clean %>% group_by(COUNTRY) %>% summarize(pop = first(pop)) %>% ungroup()

# --- Compute regional estimates
topline_presence <- lb_svy %>%
  # Country level estimates
  group_by(COUNTRY) %>%
  summarize(crim_gov = survey_mean(crim_gov, vartype = "ci"),
            crim_presence = survey_mean(crim_presence, vartype = "ci")) %>%
  ungroup() %>%
  left_join(country_pops) %>%
  # Regional topline estimate: compute country populations
  mutate(crim_gov_pop_low = crim_gov_low*pop, crim_gov_pop_high = crim_gov_upp*pop, crim_gov_pop = crim_gov*pop,
         crim_presence_pop_low = crim_presence_low*pop, crim_presence_pop_high = crim_presence_upp*pop, crim_presence_pop = crim_presence*pop) %>%
  # Aggregate populations and proportions to regional level
  summarize_at(c("crim_presence_pop_low", "crim_presence_pop_high", "crim_presence_pop", "pop"), ~ sum(.)) %>%
  mutate(crim_presence_low = crim_presence_pop_low / pop, crim_presence_high = crim_presence_pop_high / pop, crim_presence = crim_presence_pop / pop)

topline_gov <- lb_svy %>% 
  # Country level estimates
  group_by(COUNTRY) %>%
  summarize(crim_gov = survey_mean(crim_gov, vartype = "ci"),
            crim_presence = survey_mean(crim_presence, vartype = "ci")) %>%
  ungroup() %>%
  left_join(country_pops) %>%
  # Regional topline estimate: compute country populations
  mutate(crim_gov_pop_low = crim_gov_low*pop, crim_gov_pop_high = crim_gov_upp*pop, crim_gov_pop = crim_gov*pop,
         crim_presence_pop_low = crim_presence_low*pop, crim_presence_pop_high = crim_presence_upp*pop, crim_presence_pop = crim_presence*pop) %>%
  summarize_at(c("crim_gov_pop_low", "crim_gov_pop_high", "crim_gov_pop", "pop"), ~ sum(.)) %>%
  mutate(crim_gov_low = crim_gov_pop_low / pop, crim_gov_high = crim_gov_pop_high / pop, crim_gov = crim_gov_pop / pop)

# --- Calculate country-level estimates
regional_est <- lb_svy %>% 
  group_by(Country = COUNTRY) %>%
  summarize(crim_gov = survey_mean(crim_gov, vartype = "ci"),
            crim_presence = survey_mean(crim_presence, vartype = "ci")) %>%
  ungroup() %>%
  left_join(country_pops, by = c("Country" = "COUNTRY")) %>%
  # Add regional topline
  rbind(data.frame(Country = "Regional total", crim_gov = topline_gov$crim_gov, crim_gov_low = topline_gov$crim_gov_low, crim_gov_upp = topline_gov$crim_gov_high, 
                   crim_presence = topline_presence$crim_presence, crim_presence_low = topline_presence$crim_presence_low, crim_presence_upp = topline_presence$crim_presence_high, 
                   pop = sum(country_pops$pop))) %>%
  # Sort
  mutate(Country = factor(Country, levels = unique(Country[order(crim_gov, decreasing = FALSE)]), ordered = TRUE),
         Country = fct_relevel(Country, "Regional total", after = 0)) %>%
  # Generate population CIs
  mutate(crim_gov_pop_ci = paste0(round((crim_gov_low*pop) / 1000000, 1), "—", round((crim_gov_upp*pop) / 1000000, 1))) %>%
  # Fix Argentina formatting
  mutate(crim_gov_pop_ci = ifelse(Country == "Argentina", "2.0—3.7", crim_gov_pop_ci))


# Plot
p1 <- ggplot(regional_est) + 
  # Criminal governance CI and point estimate
  geom_linerange(aes(x = Country, ymin = crim_gov_low, ymax = crim_gov_upp), color = "darkred") +
  geom_label(
    aes(x = Country, y = crim_gov, label = sprintf(round(crim_gov, digits = 2) %>% str_replace("^0", "") %>% str_pad(side = "right", width = 3, pad = "0"))),
    position = position_dodge(width = 1),
    size = 1.75,
    label.padding = unit(0.1, "lines"), alpha = 0.95,
    family = "Times",
    color = "darkred",
    fontface = ifelse(regional_est$Country == "Regional total", "bold", "plain"),
    show.legend = F) +
  # Criminal presence CI and point estimate
  geom_linerange(aes(x = Country, ymin = crim_presence_low, ymax = crim_presence_upp), color = "dodgerblue2") +
  geom_label(
    aes(x = Country, y = crim_presence, label = sprintf(round(crim_presence, digits = 2) %>% str_replace("^0", "") %>% str_pad(side = "right", width = 3, pad = "0"))),
    position = position_dodge(width = 1),
    size = 1.75,
    label.padding = unit(0.1, "lines"), alpha = 0.95,
    color = "dodgerblue2",
    family = "Times",
    fontface = ifelse(regional_est$Country == "Regional total", "bold", "plain"),
    show.legend = F) +
  ## Labels, themes
  scale_y_continuous(breaks = seq(from = .1, to = .7, by = .1)) +
  labs(x = "", y = "Proportion of population", 
       title = "<br> <br>"
       ) +
  coord_flip() + theme_minimal() +
  theme(
    plot.title = element_markdown(lineheight = 1.1),
    legend.text = element_markdown(size = 11),
    axis.text.y = element_text(face = ifelse(levels(regional_est$Country) == "Regional total", "bold", "plain"), family = "Times"),
    axis.title.x = element_text(family = "Times")
  ) 

# Output
p2 <- ggplot(regional_est, aes(x = 0, y = Country)) + 
  geom_text(aes(label = crim_gov_pop_ci, color = "darkred"), fontface = ifelse((regional_est$Country) == "Regional total", "bold", "plain"), size = 3,
            family = "Times") +
  labs(x = NULL) +
  scale_color_identity() +
  theme_void() +
  theme(
    axis.ticks.x = element_blank(),
    axis.text.x = element_blank()
  )
plot <- p1 + p2 + plot_layout(widths = c(3,1))

ggsave(plot, file = "figure_1.pdf", height = 6, width = 7, bg = "white")

